## Obtain Wind data
rm(list=ls())
options(scipen=100,digits=10)

library(rgdal)
library(gdalUtils)
library(raster)
library(dplyr)
library(sf)
library(tidyverse)
library(rgeos)
library(foreign)
library(RNCEP)
uwnd1 <- NCEP.gather(variable='uwnd', level=1000,     ## 1000 is ground pressure
                     months.minmax=c(10,12), years.minmax=c(2013,2018),
                     lat.southnorth=c(20,50), lon.westeast=c(-125,-60), ## Cover the whole US continent.
                     reanalysis2 = TRUE, return.units = TRUE)
uwnd1 <- NCEP.aggregate(wx.data=uwnd1, YEARS=TRUE, MONTHS=TRUE,
                        DAYS=TRUE, HOURS=FALSE, fxn='mean')
uwnd1 <- NCEP.array2df(uwnd1)

uwnd2 <- NCEP.gather(variable='uwnd', level=1000,     ## 1000 is ground pressure
                     months.minmax=c(1,3), years.minmax=c(2014,2019),
                     lat.southnorth=c(20,50), lon.westeast=c(-125,-60), ## Cover the whole US continent.
                     reanalysis2 = TRUE, return.units = TRUE)
uwnd2 <- NCEP.aggregate(wx.data=uwnd2, YEARS=TRUE, MONTHS=TRUE,
                        DAYS=TRUE, HOURS=FALSE, fxn='mean')
uwnd2 <- NCEP.array2df(uwnd2)

uwnd=rbind(uwnd1,uwnd2)

## Transfer datetime to date variable
uwnd$datetime=gsub("_", "-", uwnd$datetime)
uwnd$datetime=substr(uwnd$datetime,0, 10)
uwnd$datetime=as.Date(uwnd$datetime)

## Change log from E to W
uwnd$longitude=uwnd$longitude-360
names(uwnd)[4]="Uwind"
names(uwnd)[1]="DATE"

vwnd1 <- NCEP.gather(variable='vwnd', level=1000,     ## 1000 is ground pressure
                     months.minmax=c(10,12), years.minmax=c(2013,2018),
                     lat.southnorth=c(20,50), lon.westeast=c(-125,-60), ## Cover the whole US continent.
                     reanalysis2 = TRUE, return.units = TRUE)
vwnd1 <- NCEP.aggregate(wx.data=vwnd1, YEARS=TRUE, MONTHS=TRUE,
                        DAYS=TRUE, HOURS=FALSE, fxn='mean')
vwnd1 <- NCEP.array2df(vwnd1)

vwnd2 <- NCEP.gather(variable='vwnd', level=1000,     ## 1000 is ground pressure
                     months.minmax=c(1,3), years.minmax=c(2014,2019),
                     lat.southnorth=c(20,50), lon.westeast=c(-125,-60), ## Cover the whole US continent.
                     reanalysis2 = TRUE, return.units = TRUE)
vwnd2 <- NCEP.aggregate(wx.data=vwnd2, YEARS=TRUE, MONTHS=TRUE,
                        DAYS=TRUE, HOURS=FALSE, fxn='mean')
vwnd2 <- NCEP.array2df(vwnd2)

vwnd=rbind(vwnd1,vwnd2)

## Transfer datetime to date variable
vwnd$datetime=gsub("_", "-", vwnd$datetime)
vwnd$datetime=substr(vwnd$datetime,0, 10)
vwnd$datetime=as.Date(vwnd$datetime)

## Change log from E to W
vwnd$longitude=vwnd$longitude-360
names(vwnd)[1]="DATE"
names(vwnd)[4]="Vwind"

## Merge wind data
WIND=merge(uwnd,vwnd, by=c("DATE","latitude", "longitude"))
names(WIND)[2:3]=c("Latitude","Longitude")


## Coordinates are centroid of 2.5*2.5 degree grid.
grid=as.data.frame(unique(WIND[2:3]))
## Set GridID
grid$GridID=seq(1:nrow(grid))
WIND=merge(WIND, grid, by=c("Latitude","Longitude"), all.x=T)

## Get plant coordinates
setwd("data/raw data/PowerPlants_US_EIA_April2019")
plant=read.dbf("PowerPlants_US_201904.dbf")
plant=subset(plant, StateName!="Puerto Rico" & StateName!="Hawaii" & StateName!="Alaska")
plant=plant[c(1,30,31)]


lat_diff=as.data.frame(as.vector(abs(outer(plant$Latitude,grid$Latitude,"-"))))
names(lat_diff)[1]="LatDiff"
lat_diff$Plant_Code=rep(plant$Plant_Code, nrow(grid))
lat_diff$GridID=rep(grid$GridID, each=nrow(plant))


log_diff=as.data.frame(as.vector(abs(outer(plant$Longitude,grid$Longitude,"-"))))
names(log_diff)[1]="LogDiff"
log_diff$Plant_Code=rep(plant$Plant_Code, nrow(grid))
log_diff$GridID=rep(grid$GridID, each=nrow(plant))

Match=merge(lat_diff,log_diff, by=c("Plant_Code","GridID"))


Match$IN=0
Match$IN[which(abs(Match$LatDiff)<=1.25&abs(Match$LogDiff)<=1.25)]=1
match_in=subset(Match, IN==1)
## plant 56297 58867 59606 60469 are right on the boundry of grid.
match_list=match_in[1:2]

## Assign wind speed to each plant
WIND_Plant=merge(WIND, match_list, by="GridID")
WIND_Plant=WIND_Plant[4:7]

## plant 56297 58867 59606 60469 are right on the boundry of grid. Take ave of 2 grid
WIND_Plant1=aggregate(WIND_Plant, by=list(WIND_Plant$Plant_Code,WIND_Plant$DATE), FUN=mean) 
setwd("data/processed data/")
WIND_Plant1=WIND_Plant1[3:6]
write.csv(WIND_Plant1, "all_wind.csv")
